Data used is: https://archive.ics.uci.edu/dataset/352/online+retail
Information about dataset : This is a transactional data set which contains all the transactions occurring between 01/12/2010 and 09/12/2011 for a UK-based and registered non-store online retail.The company mainly sells unique all-occasion gifts. Many customers of the company are wholesalers.
Corelation: This analysis can be performed on Kada per day data generated by CermePOS. Will be used for product placement on shelves.
## # A tibble: 6 × 8
## InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country
## <chr> <chr> <chr> <dbl> <dttm> <dbl> <dbl> <chr>
## 1 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 2010-12-01 08:26:00 2.55 17850 United Kingdom
## 2 536365 71053 WHITE METAL LANTERN 6 2010-12-01 08:26:00 3.39 17850 United Kingdom
## 3 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 2010-12-01 08:26:00 2.75 17850 United Kingdom
## 4 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-12-01 08:26:00 3.39 17850 United Kingdom
## 5 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 2010-12-01 08:26:00 3.39 17850 United Kingdom
## 6 536365 22752 SET 7 BABUSHKA NESTING BOXES 2 2010-12-01 08:26:00 7.65 17850 United Kingdom
library(dplyr)
best_selling_products <- data %>%
group_by(Description) %>%
summarise(Total_Quantity = sum(Quantity, na.rm = TRUE)) %>%
arrange(desc(Total_Quantity))
head(best_selling_products, 50)
## # A tibble: 50 × 2
## Description Total_Quantity
## <chr> <dbl>
## 1 WORLD WAR 2 GLIDERS ASSTD DESIGNS 53847
## 2 JUMBO BAG RED RETROSPOT 47363
## 3 ASSORTED COLOUR BIRD ORNAMENT 36381
## 4 POPCORN HOLDER 36334
## 5 PACK OF 72 RETROSPOT CAKE CASES 36039
## 6 WHITE HANGING HEART T-LIGHT HOLDER 35317
## 7 RABBIT NIGHT LIGHT 30680
## 8 MINI PAINT SET VINTAGE 26437
## 9 PACK OF 12 LONDON TISSUES 26315
## 10 PACK OF 60 PINK PAISLEY CAKE CASES 24753
## # ℹ 40 more rows
ggplot(best_selling_products[1:10, ], aes(x = reorder(Description, Total_Quantity), y = Total_Quantity)) +
#geom_point()+
geom_bar(stat = "identity", fill = "#9c9797", colour ="#0cf00c") +
coord_flip() +
labs(title = "Top 20 Best Selling Products", x = "Product", y = "Total Quantity Sold")
p <- ggplot(best_selling_products[1:10, ], aes(x = factor(Description), y = Total_Quantity, fill = Description)) +
geom_bar(stat = "identity") +
labs(title = "Top 10 Best Selling Products",
x = "Product",
y = "Total Quantity Sold",
fill = "Product") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Convert to Plotly
ggplotly(p)
library(lubridate)
library(dplyr)
best_selling_time <- data %>%
mutate(Hour = hour(InvoiceDate)) %>%
group_by(Hour, Description) %>%
summarise(Total_Quantity = sum(Quantity, na.rm = TRUE), .groups = "drop") %>%
filter(Total_Quantity > 0) %>%
arrange(desc(Total_Quantity))
head(best_selling_time, 50)
## # A tibble: 50 × 3
## Hour Description Total_Quantity
## <int> <chr> <dbl>
## 1 15 ASSTD DESIGN 3D PAPER STICKERS 12677
## 2 12 WORLD WAR 2 GLIDERS ASSTD DESIGNS 12213
## 3 10 WORLD WAR 2 GLIDERS ASSTD DESIGNS 8647
## 4 12 JUMBO BAG RED RETROSPOT 7567
## 5 11 JUMBO BAG RED RETROSPOT 7539
## 6 10 SMALL POPCORN HOLDER 7334
## 7 12 PACK OF 72 RETROSPOT CAKE CASES 7191
## 8 10 JUMBO BAG RED RETROSPOT 7080
## 9 13 WORLD WAR 2 GLIDERS ASSTD DESIGNS 7056
## 10 12 WHITE HANGING HEART T-LIGHT HOLDER 7003
## # ℹ 40 more rows
tail(best_selling_time, 50)
## # A tibble: 50 × 3
## Hour Description Total_Quantity
## <int> <chr> <dbl>
## 1 20 NO SINGING METAL SIGN 1
## 2 20 NUMBER TILE COTTAGE GARDEN No 1
## 3 20 OPEN CLOSED METAL SIGN 1
## 4 20 OVAL WALL MIRROR DIAMANTE 1
## 5 20 PACK OF 20 NAPKINS PANTRY DESIGN 1
## 6 20 PACK OF 20 SPACEBOY NAPKINS 1
## 7 20 PACK OF 60 MUSHROOM CAKE CASES 1
## 8 20 PING! MICROWAVE PLATE 1
## 9 20 PINK PAISLEY SQUARE TISSUE BOX 1
## 10 20 PINK PAPER PARASOL 1
## # ℹ 40 more rows
range(best_selling_time$Hour)
## [1] 6 20
# Get the best-selling product for each hour
best_selling_per_hour <- best_selling_time %>%
group_by(Hour) %>%
slice_max(order_by = Total_Quantity, n = 1) # Selects the top product per hour
head(best_selling_per_hour, 50)
## # A tibble: 15 × 3
## # Groups: Hour [15]
## Hour Description Total_Quantity
## <int> <chr> <dbl>
## 1 6 DOG BOWL VINTAGE CREAM 1
## 2 7 RABBIT NIGHT LIGHT 912
## 3 8 WHITE HANGING HEART T-LIGHT HOLDER 2553
## 4 9 PACK OF 72 RETROSPOT CAKE CASES 5247
## 5 10 WORLD WAR 2 GLIDERS ASSTD DESIGNS 8647
## 6 11 JUMBO BAG RED RETROSPOT 7539
## 7 12 WORLD WAR 2 GLIDERS ASSTD DESIGNS 12213
## 8 13 WORLD WAR 2 GLIDERS ASSTD DESIGNS 7056
## 9 14 JUMBO BAG RED RETROSPOT 4750
## 10 15 ASSTD DESIGN 3D PAPER STICKERS 12677
## 11 16 JUMBO BAG RED RETROSPOT 3504
## 12 17 POPCORN HOLDER 3386
## 13 18 POPCORN HOLDER 4811
## 14 19 LETTER SHAPE PENCIL SHARPENER 1600
## 15 20 HOMEMADE JAM SCENTED CANDLES 696
# Plot
b <- ggplot(best_selling_per_hour, aes(x = factor(Hour), y = Total_Quantity, fill = Description)) +
geom_bar(stat = "identity") +
labs(title = "Best Selling Product for Each Hour",
x = "Hour of the Day",
y = "Total Quantity Sold",
fill = "Product") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(b)
# Get the top 5 best-selling products for each hour
top_5_per_hour <- best_selling_time %>%
group_by(Hour) %>%
slice_max(order_by = Total_Quantity, n = 5) %>%
ungroup()
head(top_5_per_hour, 50)
## # A tibble: 50 × 3
## Hour Description Total_Quantity
## <int> <chr> <dbl>
## 1 6 DOG BOWL VINTAGE CREAM 1
## 2 7 RABBIT NIGHT LIGHT 912
## 3 7 JUMBO BAG PINK POLKADOT 400
## 4 7 SOMBRERO 400
## 5 7 HOT WATER BOTTLE KEEP CALM 360
## 6 7 MINI PAINT SET VINTAGE 360
## 7 8 WHITE HANGING HEART T-LIGHT HOLDER 2553
## 8 8 JUMBO BAG RED RETROSPOT 1880
## 9 8 HEART OF WICKER SMALL 1744
## 10 8 MINI PAINT SET VINTAGE 1584
## # ℹ 40 more rows
# Plot
a <- ggplot(top_5_per_hour, aes(x = reorder(Description, Total_Quantity),
y = Total_Quantity, fill = Description)) +
geom_bar(stat = "identity", show.legend = FALSE) +
facet_wrap(~ Hour, scales = "free_y") + # Creates a grid of plots by Hour
coord_flip() + # Flips bars for better readability
labs(title = "Top 5 Best-Selling Products for Each Hour",
x = "Product",
y = "Total Quantity Sold") +
theme_minimal()
ggplotly(a)
library(dplyr)
library(ggplot2)
library(plotly)
library(lubridate)
# Ensure Quantity is numeric
data <- data %>%
mutate(Quantity = as.numeric(Quantity))
# Best selling products based on the day of the week
best_selling_day <- data %>%
mutate(Weekday = weekdays(InvoiceDate)) %>%
group_by(Weekday, Description) %>%
summarise(Total_Quantity = sum(Quantity, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(Total_Quantity))
# Check results
head(best_selling_day, 10)
## # A tibble: 10 × 3
## Weekday Description Total_Quantity
## <chr> <chr> <dbl>
## 1 Thursday WORLD WAR 2 GLIDERS ASSTD DESIGNS 18051
## 2 Friday ASSTD DESIGN 3D PAPER STICKERS 12793
## 3 Thursday ASSORTED COLOUR BIRD ORNAMENT 11409
## 4 Thursday JUMBO BAG RED RETROSPOT 11283
## 5 Wednesday WORLD WAR 2 GLIDERS ASSTD DESIGNS 10315
## 6 Tuesday WHITE HANGING HEART T-LIGHT HOLDER 9954
## 7 Wednesday JUMBO BAG RED RETROSPOT 9934
## 8 Thursday MINI PAINT SET VINTAGE 9348
## 9 Wednesday BROCADE RING PURSE 9154
## 10 Tuesday JUMBO BAG RED RETROSPOT 8970
# Visualization
c <- ggplot(best_selling_day, aes(x = factor(Weekday, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")),
y = Total_Quantity, fill = Description)) +
geom_bar(stat = "identity") +
labs(title = "Best Selling Product for Each Weekday",
x = "Weekday",
y = "Total Quantity Sold",
fill = "Product") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Convert to interactive plot
ggplotly(c)
library(arules)
library(arulesViz)
library(dplyr)
library(ggplot2)
# Convert data to transactions format
transactions <- as(split(data$Description, data$InvoiceNo), "transactions")
## Warning in asMethod(object): removing duplicated items in transactions
# Apply Apriori Algorithm
rules <- apriori(transactions, parameter = list(support = 0.01, confidence = 0.3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
## 0.3 0.1 1 none FALSE TRUE 5 0.01 1 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 259
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[4211 item(s), 25900 transaction(s)] done [0.10s].
## sorting and recoding items ... [590 item(s)] done [0.01s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.02s].
## writing ... [681 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# Convert rules to a tidy data frame
rules_df <- DATAFRAME(rules, separate = TRUE) # Explicit conversion
# Extract only the top 10 rules based on lift
top_rules <- rules_df %>%
arrange(desc(lift)) %>%
head(10)
# Plot using ggplot2
d <- ggplot(top_rules, aes(x = reorder(LHS, lift), y = lift, fill = RHS)) +
geom_bar(stat = "identity") +
coord_flip() + # Flip for better readability
labs(title = "Top 10 Association Rules by Lift",
x = "LHS (Antecedent)",
y = "Lift",
fill = "RHS (Consequent)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplotly(d)